/*==============================================================================
This do file reads in a country's migration matrix (if available), and 
calculates inmigration and outmigration for all lower level NUTS regions.
For countries without a migration matrix (FI, DE), migration data is appended 
at whatever NUTS aggregation is available.

Outline: 
1) Create SE migration matrix
2) Create CH migration matrix by deflating lifetime migration matrix
3) Using a country's migration matrix, calculate outmigration for all possible
   NUTS levels. 
4) Using a country's migration matrix, calculate inmigration for all possible 
   NUTS levels
5) Code any composite regions that were originally in migration matrices.
6) Prepare DE (German data requires additional calculations) 
7) Combine all data

==============================================================================*/
clear
set more off

cd "$insheet_files/Migration/country_migration_matrices_csv_files"

*===============================================================================
* 1) Sweden 
*===============================================================================

*1) Create SE migration matrix

insheet using SE_migration_table.csv, clear

collapse (sum) stockholms uppsala sdermanlands stergtlands jnkpings kronobergs /// 
 kalmars gotlands blekinge kristianstads malmhus hallands goteborgsbohus /// 
 alvsborgs skaraborgs vrmlands rebro vstmanlands kopparbergs gavleborgs /// 
 vsternorrlands jmtlands vsterbottens norrbotens, by(nuts direction)

gen SE224=kristianstads+malmhus
drop kristianstads malmhus
gen SE232=goteborgsbohus+alvsborgs+skaraborgs
drop goteborgsbohus alvsborgs skaraborgs

rename stockholms SE110
rename uppsala SE121
rename sdermanlands SE122
rename stergtlands SE123
rename jnkpings SE211
rename kronobergs SE212
rename kalmars SE213
rename gotlands SE214
rename blekinge SE221
rename hallands SE231
rename vrmlands SE311
rename rebro SE124
rename vstmanlands SE125
rename kopparbergs SE312
rename gavleborgs SE313
rename vsternorrlands SE321
rename jmtlands SE322
rename vsterbottens SE331
rename norrbotens SE332

keep if direction=="Out"
rename nuts nuts_start
gen nuts_level=3
drop direction

outsheet using SE_migration_matrix.csv, comma replace

*===============================================================================
* 2) Switzerland 
*===============================================================================
clear
import excel using "CH Table 7.12 Resident Population by Place of Residence 1 and 5 Years Ago", cellrange(A8:M33) firstrow
keep nuts inmigration*

sort nuts
tempfile inmigration_1_5_yrs
save `inmigration_1_5_yrs.dta'

clear
import excel using "CH Table 7.03 Migration Matrix Combined.xlsx", cellrange(A1:AA26) sheet("transpose") first
sort nuts
merge 1:1 nuts using  `inmigration_1_5_yrs.dta', nogen

foreach var of varlist CH*{
	egen inmigration_total_`var' = total(`var')
	quietly gen rescaling_factor_`var' = inmigration_1_yr/inmigration_total_`var' if nuts=="`var'" //updated 7.10.13
		egen rescaling_factor_`var'_m = min(rescaling_factor_`var')
		drop rescaling_factor_`var'
		rename rescaling_factor_`var'_m rescaling_factor_`var'
	quietly replace `var' = rescaling_factor_`var' * `var'
}

keep nuts* CH* 
	rename nuts nuts_start
outsheet using CH_migration_matrix.csv, comma replace

*===============================================================================
* 3) Using a country's migration matrix, calculate outmigration for all possible 
*   NUTS levels. 
*===============================================================================

clear
local country = "AT BE CH DK ES FR IT NL SE"

*1) Outmigration 
foreach co in `country' {
	insheet using `co'_migration_matrix.csv, case
	
	tempfile `co'_full
	quietly save ``co'_full.dta'
	
	local N = _N
	
	forval i= 1/`N' {
		gen n=_n
		quietly keep if n==`i' 
		drop n
		quietly reshape long `co', i(nuts_start nuts_level) j(nuts_end) string
		rename `co' migration
		tempfile `co'_`i'
		quietly save ``co'_`i'.dta'
		
		clear
		use ``co'_full.dta'
	}	

	clear 
	use ``co'_1.dta'
	forval i=2/`N' {
		append using ``co'_`i'.dta'
	}
		
	quietly tostring nuts_end, replace
	gen country=substr(nuts_start,1,2)
	replace nuts_end = country+nuts_end
	
	tempfile `co'_migration
	quietly save ``co'_migration.dta'
	clear
}

use `FR_migration.dta'
foreach co in `country' {
	append using ``co'_migration.dta'
}

duplicates drop 

tempfile precollapse_migration
save `precollapse_migration.dta'

keep if nuts_level==3 

capture drop if nuts_start==nuts_end

capture collapse (sum) migration, by(nuts_start) //collapse by starting region, this is outmigration
rename nuts_start nuts

tempfile nuts3_outmigration
save `nuts3_outmigration.dta'

use `precollapse_migration.dta', clear

bys country: egen max_nuts_level=max(nuts_level)
keep if max_nuts>1

gen nuts2_start= substr(nuts_start,1,4)
gen nuts2_end= substr(nuts_end,1,4)

capture drop if nuts2_start==nuts2_end

capture collapse (sum) migration, by(nuts2_start) 

capture rename nuts2 nuts
capture gen nuts_level=2

tempfile nuts2_outmigration
save `nuts2_outmigration.dta'

use `precollapse_migration.dta', clear

gen nuts1_start= substr(nuts_start,1,3)
gen nuts1_end= substr(nuts_end,1,3)

drop if nuts1_start==nuts1_end

collapse (sum) migration, by(nuts1_start) 

rename nuts1 nuts
gen nuts_level=1

append using `nuts2_outmigration.dta'
append using `nuts3_outmigration.dta'

duplicates drop nuts migration, force

rename migration outmigration

tempfile outmigration_all
save `outmigration_all.dta'

*4) Inmigration, same as above, except collapse by region you end up in. 

use `precollapse_migration.dta', clear

keep if nuts_level==3 

capture drop if nuts_start==nuts_end

capture collapse (sum) migration (first) nuts_level, by(nuts_end) //collapse by region you end up in, this is inmigration
rename nuts_end nuts

tempfile nuts3_inmigration
save `nuts3_inmigration.dta'

use `precollapse_migration.dta', clear

bys country: egen max_nuts_level=max(nuts_level)
keep if max_nuts>1

gen nuts2_start= substr(nuts_start,1,4)
gen nuts2_end= substr(nuts_end,1,4)

capture drop if nuts2_start==nuts2_end

capture collapse (sum) migration, by(nuts2_end) 

capture rename nuts2 nuts
capture gen nuts_level=2

tempfile nuts2_inmigration
save `nuts2_inmigration.dta'

use `precollapse_migration.dta', clear

gen nuts1_start= substr(nuts_start,1,3)
gen nuts1_end= substr(nuts_end,1,3)

drop if nuts1_start==nuts1_end

collapse (sum) migration, by(nuts1_end) 

rename nuts1 nuts
gen nuts_level=1
append using `nuts2_inmigration.dta'
append using `nuts3_inmigration.dta'

duplicates drop nuts migration, force

rename migration inmigration

merge 1:1 nuts using `outmigration_all.dta', assert(3) nogen

*===============================================================================
* 5) Code Composite regions in the above migration data  
*===============================================================================

replace nuts="CH021&CH025" if nuts=="CH021"
replace nuts="ITH1&ITH2" if nuts=="ITH1" 
replace nuts="NL21&NL23" if nuts=="NL21"
replace nuts="BE335&BE336" if nuts == "BE335"
replace nuts="ES705&ES704&S708" if nuts=="ES705"
replace nuts="ES703&ES706&ES707&ES709" if nuts=="ES703"
replace nuts="ES630&ES640" if nuts=="ES630"

save migration_append.dta , replace

*===============================================================================
* 6) Germany 
*===============================================================================
*-------------------------------------------------------------------------------
* TABLE 10 has information of migration between landers NUTS1 that should be a 
* MINIMUM for the number of migrants from the region
*-------------------------------------------------------------------------------

clear
import excel /// 
"$insheet_files/Migration/German Migration/Germany - Table 10.xls", /// 
	firstrow cellrange(A8:M681)

keep if nuts!=""

* Rename codes (composite codes that are considered 
* to be rough proxies for NUTS2 codes in later periods)

replace nuts="DE11" if nuts=="DE145&DE112&DE113&DE114&DE11C&DE117&DE118&DE119&DE115&DE11D&DE116&DE11A&DE111&DE144"
replace nuts="DE12" if nuts=="DE12B&DE122&DE123&DE11B&DE127&DE125&DE126&DE129"
replace nuts="DE13" if nuts=="DE147&DE132&DE138&DE133&DE131&DE139&DE134&DE136&DE13A&DE121&DE124"
replace nuts="DE14" if nuts=="DE146&DE148&DE149&DE141&DE143&DE142&DE137&DE135&DE12C&DE12A"
replace nuts="DE73" if nuts=="DE724&DE73"
replace nuts="DE71&DE72" if nuts=="DE71&DE721&DE722&DE723&DE725"

* The following regions do not have a good single match to modern NUTS. 
* Solution: Calculate inmigration & outmigration rates, take the average, 
* multiply rates by population
replace nuts="DE91" if nuts=="DE911&DE912&DE91B&DE917&DE91A&DE916"|nuts=="DE911&DE912&DE91B&DE917"|nuts=="DE91A&DE925&DE926&DE918&DE916&DE919&DE915"
replace nuts="DE92" if nuts=="DE922&DE923&DE927&DE928&DE929"|nuts=="DE925&DE926&DE918&DE919&DE915"
replace nuts="DE93" if nuts=="DE931&DE93A&DE934&DE935&DE933&DE938&DE914&DE913"|nuts=="DE932&DE939&DE937&DE93B&DE936"
replace nuts="DE94" if nuts=="DE944&DE94E&DE949&DE94B"|nuts=="DE94C&DE947&DE942&DE94H"|nuts=="DE94A&DE945&DE94G&DE946&DE943&DE94D&DE941&DE948&DE94F"
replace nuts="DEA2" if nuts=="DEA22&DEA23&DEA24&DEA27&DEA2A&DEA2B&DEA2C"|nuts=="DEA2D&DEA29&DEA26&DEA28"

keep nuts net total_migration_rate inmigration_within_land outmigration_within_land inmigration_between_land outmigration_between_land

foreach var of varlist total_migration_rate inmigration_within_land outmigration_within_land inmigration_between_land outmigration_between_land {
	destring `var', replace force 
}

gen POP = net / (total/1000)

* Calculate LANDER TO LANDER migration, which should be a minimum for NUTS 2 
* migration 
gen outmigration_minimum 	= outmigration_between_land 
gen outmigration_rate_minimum 	= outmigration_minimum/ POP
	
gen inmigration_minimum		= inmigration_between_land 

gen inmigration_rate_minimum	= inmigration_minimum/ POP

collapse (mean) inmigration_rate_minimum outmigration_rate_minimum, by(nuts)

keep nuts inmigration outmigration
sort nuts

save DE_migration_Table10, replace

*-------------------------------------------------------------------------------
* TABLE 11 -- Allows breakdown of migration within the Lander to other NUTS 2 
* regions. 
*-------------------------------------------------------------------------------

clear
cd "$insheet_files/Migration/German Migration/" 
do "germany_table_11.do"

cd "$insheet_files/Migration/country_migration_matrices_csv_files"

rename pop 		migration_pop_base_de
rename total_inflow 	inmigration
rename total_outflow 	outmigration

save de_total_flows, replace

use de_total_flows, clear

//create duplicates for nuts 2 == nuts1 
foreach nuts in "DE30" "DE40" "DE50" "DE60" "DE80" "DEC0" "DEF0" "DEE0" { 
	expand 2 if nuts =="`nuts'" 
	bys nuts: gen n=_n 
	replace nuts = substr(nuts,1,3) if n==2
	drop n
}


** MERGE WITH TABLE 10 TO COMPARE LANDER TO LANDER MIGRATION
merge 1:1 nuts using DE_migration_Table10
gen inmigration_minimum = migration_pop_base_de* inmigration_rate_minimum 
gen outmigration_minimum = migration_pop_base_de* outmigration_rate_minimum 

gen outmig_rate_t11= outmigration /  migration_pop_base_de
gen inmig_rate_t11= inmigration /  migration_pop_base_de

tabstat inmigration inmig_rate_t11 inmigration_rate_min inmigration_minimum if inmigration_minimum > inmigration &inmigration_min !=., by(nuts)
tabstat outmigration outmig_rate_t11 outmigration_rate_min outmigration_min if outmigration_minimum > outmigration &outmigration_min !=., by(nuts)

replace inmigration = inmigration_minimum if inmigration_minimum > inmigration &inmigration_min !=.
replace outmigration = outmigration_minimum if outmigration_minimum > outmigration &outmigration_min !=.

keep nuts migration_pop_base_de inmigration outmigration

tempfile DE_migration
save 	`DE_migration.dta'
*===============================================================================
* 7  Combine all data
*===============================================================================

insheet using no_migration_matrix_data.csv, clear

*Append/merge all data
append using migration_append.dta 
append using "$dta_files/uk_migration.dta"  
append using  `DE_migration.dta'

*===============================================================================
* Generate "migration_yrs" variable with the number of years the migration 
* occurred over 
*===============================================================================

gen country=substr(nuts,1,2)

gen migration_yrs = 1 
	replace migration_yrs = 7+(1+(20/28))/12 if country=="FR" 
	replace migration_yrs = 4 if country=="ES"
	replace migration_yrs = 5 if country=="AT"

keep nuts outmigration inmigration migration_yrs migration_pop_base*

save "$dta_files/IC_EU_migration", replace


